package Java工具收集;

import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.alibaba.fastjson.JSON;

import common.CommonPage;
import common.CommonResult;
import lombok.Data;

/**
 * @function lucene查询工具类
 * @author 肖荣辉
 * @date 2021年7月2日
*/
public class LuceneUtils {
	
	public final static String REPOSITORY_DATASTORE_INDEX_HOME = "/MyJava/luceneIndex";
	
	/**
	 * @function 创建一个以span标签包裹的高亮对象
	 * @param query
	 * @author 肖荣辉
	 * @date 2021年7月2日
	*/
	 public static Highlighter createSpanHighlighter(Query query) {
		 
		 // 格式化器
        Formatter formatter = new SimpleHTMLFormatter("<span class=\"lucene_highlight\">", "</span>");
        
        QueryScorer scorer = new QueryScorer(query);
        
        // 高亮对象
        return new Highlighter(formatter , scorer);
	 }
	 
	/**
	 * @function  打印文档集合
	 * @author 肖荣辉
	 * @date 2021年7月2日
	*/
	public static void printDocs(List<Document>  docList) {
		
		for(Document doc : docList) {
			
			StringBuilder sb = new StringBuilder();
		    doc.forEach(field -> {
		    	sb.append(" , " +  field.name() + " : " + doc.get(field.name()));
		    });
		    
		    String txt = sb.length()  > 0 ? sb.substring(2) : "";
		    
			System.out.println(txt);
		}
		
	} 
	
	/**
	 * @function  打印文档集合
	 * @author 肖荣辉
	 * @date 2021年7月2日
	*/
	public static void printFileInfoDTORst(CommonResult<CommonPage<FileInfoDTO>> commonResult) {
		
		System.out.println("本次搜索到共" + commonResult.getData().getTotal() + "条数据");
		
		List<FileInfoDTO>  docList = commonResult.getData().getList();
		
		if(docList == null)
			return;
		
		for(FileInfoDTO dto  : docList) {
			System.out.println("score : "+dto.getScore()+" ,  id : "+dto.getId() +" , fileName : " + dto.getFileName()  + " , content : " + dto.getContent()  + " , role : " + dto.getReadPermissionRoleNos() + " , userIds : " + dto.getReadPermissionUserIds());
		}
		
	} 
	
	/**
	 * @function  打印文档集合
	 * @author 肖荣辉
	 * @date 2021年7月2日
	*/
	public static void printDocs(CommonResult<CommonPage<Document>> commonResult) {
		
		System.out.println("本次搜索到共" + commonResult.getData().getTotal() + "条数据");
		
		List<Document>  docList = commonResult.getData().getList();
		
		if(docList == null)
			return;
		
		printDocs(docList);
		
	} 
	
	/**
	 * @function 创建单个文档的索引
	 * @param indexRootPath 索引根目录路径
	 * @param docs 要创建索引的集合
	 * @author 肖荣辉
	 * @date 2021年7月1日
	 */
	public  synchronized static void createIndex(FileInfoDTO dto ) throws Exception{
		
		if(dto == null)
			return;
		
		Document doc = new Document();
		
		if(dto.getId() != null)
			doc.add(new StringField("id", dto.getId(), Field.Store.YES));
		
		if(dto.getFileName() != null)
			doc.add(new TextField("fileName", dto.getFileName(), Field.Store.YES));
		
		if(dto.getContent() != null)
			doc.add(new TextField("content", dto.getContent(), Field.Store.YES));
        
		if(dto.getSize() != null)
			doc.add(new TextField("size", dto.getSize() , Field.Store.YES));
        
		if(dto.getReadPermissionRoleNos() != null)
			doc.add(new TextField("readPermissionRoleNos", dto.getReadPermissionRoleNos(), Field.Store.YES));
        
		if(dto.getReadPermissionUserIds() != null)
			doc.add(new TextField("readPermissionUserIds", dto.getReadPermissionUserIds(), Field.Store.YES));
        
		if(dto.getMimeType() != null)
			doc.add(new TextField("mimeType", dto.getMimeType(), Field.Store.YES));
       
		if(dto.getSuffix() != null)
			doc.add(new TextField("suffix", dto.getSuffix(), Field.Store.YES));
		
		if(dto.getCurrentVersionFlag() != null) 
			doc.add(new TextField("currentVersionFlag", dto.getCurrentVersionFlag() , Field.Store.YES));
	
		// 创建目录对象
		Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		 
        // 创建配置对象
        IndexWriterConfig conf = new IndexWriterConfig(new IKAnalyzer());
      
        // 创建索引写出工具
        IndexWriter writer = new IndexWriter(directory, conf);
       
        //如果存在则删除 , 如果不存在则新增 , id作为唯一值
        writer.updateDocument(new Term("id" , dto.getId()) , doc);
      
        // 提交
        writer.commit();
       
        // 关闭
        writer.close();
		
	}
	
	 /**
	 * @function 删除索引
	 * @param uniqueFieldName 唯一字段名称
	 * @param 唯一字段值
	 * @author 肖荣辉
	 * @date 2021年7月1日
	*/
	 public void deleteIndex(String uniqueFieldName , String uniqueFieldValue) throws Exception {
		 
       // 创建目录对象
		 Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		 
       // 创建配置对象
       IndexWriterConfig conf = new IndexWriterConfig( new IKAnalyzer());
       
       // 创建索引写出工具
       IndexWriter writer = new IndexWriter(directory, conf);

       // 根据字段名的值进行删除 , 所有匹配到的都会被删除
       writer.deleteDocuments(new Term(uniqueFieldName , uniqueFieldValue));

       // 提交
       writer.commit();
       
       // 关闭
       writer.close();
   }
  	 
	/**
	 * @function 根据唯一字段名称 , 修改索引
	 * @author 肖荣辉
	 * @date 2021年7月1日
	*/
	 public static void modifyIndex(Document doc , String uniqueFieldName) throws Exception{
        
		 // 创建目录对象
		 Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		 
        // 创建配置对象
        IndexWriterConfig conf = new IndexWriterConfig(new IKAnalyzer());
       
        // 创建索引写出工具
        IndexWriter writer = new IndexWriter(directory, conf);

        /* 修改索引。参数：
         * 	uniqueFieldName：根据这个词条匹配到的所有文档都会被修改
         * 	 doc：要修改的新的文档数据
         */
        writer.updateDocument(new Term(uniqueFieldName , doc.get(uniqueFieldName)) , doc);
       
        // 提交
        writer.commit();
        
        // 关闭
        writer.close();
        
        System.out.println("修改索引成功!");
    }
	
	 /**
	 * @function 关键字出现在多个字段中的词条查询
	 * @param keywords 关键词
	 * @param fieldNames 要查找的字段集合
	 * @author 肖荣辉
	 * @date 2021年7月1日
	*/
	public static CommonResult<CommonPage<Document>> multiFieldTermSearch(String keywords  , int pageNum , int pageSize , String... fieldNames) throws Exception {
		
		//当前页的结束条数 (不包含)
		int start = (pageNum - 1) * pageSize;
		
        int end = start + pageSize;
        
		BooleanQuery.Builder  builder = new BooleanQuery.Builder();
		
		/**
		 * 且：Occur.MUST
		 * 或：Occur.SHOULD 
		 * 非：Occur.MUST_NOT
		 */
        for(String fieldName : fieldNames) {
        	 builder.add(new TermQuery(new Term(fieldName , keywords)) , BooleanClause.Occur.SHOULD);
        }
        
        BooleanQuery  query = builder.build();
		
		// 目录对象
		Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		
		// 创建读取工具
		IndexReader reader = DirectoryReader.open(directory);
		
		// 创建搜索工具
		IndexSearcher searcher = new IndexSearcher(reader);
		
		// 搜索数据，查询[0 , end)条
		TopDocs topDocs = searcher.search(query, end);
		
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		//总命中条数
		int totalHits = (int) topDocs.totalHits.value;
		
		//不能越界
		end = end <= totalHits ? end : totalHits;
		
		List<Document> docList = new ArrayList<Document>();
		
	    for (int i = start; i < end; i++) {
	    	
            ScoreDoc scoreDoc = scoreDocs[i];
			
		    // 获取文档编号
			int docID = scoreDoc.doc;
					
			Document doc = reader.document(docID);
			
			docList.add(doc);
		
		}
		
	    return CommonResult.success(CommonPage.create(pageNum , pageSize , docList , totalHits));
	}
		
	/**
	 * @function 关键字出现在多个字段中的分词查询
	 * @param keywords 关键词
	 * @param fieldNames 要查找的字段集合
	 * @author 肖荣辉
	 * @date 2021年7月1日
	*/
	public static CommonResult<CommonPage<Document>> multiFieldAnalysisSearch(String keywords  , int pageNum , int pageSize , String... fieldNames) throws Exception{
		
		//当前页的结束条数 (不包含)
		int start = (pageNum - 1) * pageSize;
		
        int end = start + pageSize;
		
		 /*多个字段查询 , 在搜索一个关键词的时候，有可能这个关键词多个字段中*/
		 QueryParser queryParser = new MultiFieldQueryParser(fieldNames , new IKAnalyzer());
		  
	     Query query = queryParser.parse(keywords);
	     
		// 目录对象
		Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		
		// 创建读取工具
		IndexReader reader = DirectoryReader.open(directory);
		
		// 创建搜索工具
		IndexSearcher searcher = new IndexSearcher(reader);
		
		// 搜索数据，查询[0 , end)条
		TopDocs topDocs = searcher.search(query, end);
		
		//总命中条数
		int totalHits = (int) topDocs.totalHits.value;
		
		//不能越界
		end = end <= totalHits ? end : totalHits;
		
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		List<Document> docList = new ArrayList<Document>();
		
	    for (int i = start; i < end; i++) {
	    	
            ScoreDoc scoreDoc = scoreDocs[i];
			
		    // 获取文档编号
			int docID = scoreDoc.doc;
			
			Document doc = reader.document(docID);
			
			docList.add(doc);
		}
	    
	    return CommonResult.success(CommonPage.create(pageNum , pageSize , docList , totalHits));
	}
	
	 /**
	 * @function 关键字出现在多个字段中的词条查询 - 高亮关键词
	 * @param keywords 关键词
	 * @param fieldNames 要查找的字段集合
	 * @author 肖荣辉
	 * @date 2021年7月1日
	*/
	public static CommonResult<CommonPage<FileInfoDTO>> multiFieldTermSearchWithHightlight(String keywords  , int pageNum , int pageSize , String... fieldNames) throws Exception {
		
		//当前页的结束条数 (不包含)
		int start = (pageNum - 1) * pageSize;
		
        int end = start + pageSize;
        
		BooleanQuery.Builder  builder = new BooleanQuery.Builder();
		
		/**
		 * 且：Occur.MUST
		 * 或：Occur.SHOULD 
		 * 非：Occur.MUST_NOT
		 */
        for(String fieldName : fieldNames) {
        	 builder.add(new TermQuery(new Term(fieldName , keywords)) , BooleanClause.Occur.SHOULD);
        }
        
        BooleanQuery  query = builder.build();
		
		// 目录对象
		Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		
		// 创建读取工具
		IndexReader reader = DirectoryReader.open(directory);
		
		// 创建搜索工具
		IndexSearcher searcher = new IndexSearcher(reader);
		
		// 搜索数据，查询[0 , end)条
		TopDocs topDocs = searcher.search(query, end);
		
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		//总命中条数
		int totalHits = (int) topDocs.totalHits.value;
		
		//不能越界
		end = end <= totalHits ? end : totalHits;
		
		List<FileInfoDTO> fileInfoDTOList = new ArrayList<FileInfoDTO>();
		 
		//高亮工具
		Highlighter highlighter = createSpanHighlighter(query);
		
		//中文分词器
		IKAnalyzer ikAnalyzer = new IKAnalyzer();
		
	    for (int i = start; i < end; i++) {
	    	
            ScoreDoc scoreDoc = scoreDocs[i];
			
		    // 获取文档编号
			int docID = scoreDoc.doc;
					
			Document doc = reader.document(docID);
			
			FileInfoDTO dto = new FileInfoDTO();
			
			dto.setId(doc.get("id"));
			dto.setFileName(doc.get("fileName"));
			dto.setSize(doc.get("size"));
			dto.setReadPermissionRoleNos(doc.get("readPermissionRoleNos"));
			dto.setReadPermissionUserIds(doc.get("readPermissionUserIds"));
			dto.setMimeType(doc.get("mimeType"));
			dto.setSuffix(doc.get("suffix"));
			
			//高亮关键字后的文件内容
			 String highlightContent = highlighter.getBestFragment(ikAnalyzer, "content", doc.get("content"));
			 highlightContent = highlightContent == null ? doc.get("content") : highlightContent;
			
			//高亮关键字后的文件名
			 String highlightFileName =highlighter.getBestFragment(ikAnalyzer, "fileName", doc.get("fileName"));
			 highlightFileName = highlightFileName == null ? doc.get("fileName") : highlightFileName;
			
			dto.setHighlightContent(highlightContent);
			dto.setHighlightFileName(highlightFileName);
			
			fileInfoDTOList.add(dto);
		}
		
	    return CommonResult.success(CommonPage.create(pageNum , pageSize , fileInfoDTOList , totalHits));
	}
	
	
	
	 /**
	 * @function 关键字出现在多个字段中的词条查询 - 根据权限查询- 高亮关键词 
	 * @param keywords 关键词
	 * @param pageNum 页码
	 * @param pageSize 页面大小
	 * @param userId 用户ID
	 * @param pageSize 页面大小
	 * @param roleNoList 用户拥有的角色集合
	 * @author 肖荣辉
	 * @date 2021年7月1日
	*/
	public static CommonResult<CommonPage<FileInfoDTO>> multiFieldTermPermissionSearchWithHightlight(String keywords  , int pageNum , int pageSize , String userId , List<String> roleNoList  , String... fieldNames) throws Exception {
		
		//当前页的结束条数 (不包含)
		int start = (pageNum - 1) * pageSize;
		
        int end = start + pageSize;
		
		BooleanQuery.Builder  builder1 = new BooleanQuery.Builder();
		
		/*用户ID*/
		builder1.add(new TermQuery(new Term("readPermissionUserIds" , userId)) , BooleanClause.Occur.SHOULD);
		
		for(String roleNo : roleNoList) {
			builder1.add(new TermQuery(new Term("readPermissionRoleNos" , roleNo)) , BooleanClause.Occur.SHOULD);
		}
		
		/**
		 * 且：Occur.MUST
		 * 或：Occur.SHOULD 
		 * 非：Occur.MUST_NOT
		 */
		BooleanQuery.Builder  builder2 = new BooleanQuery.Builder();
		
        for(String fieldName : fieldNames) {
        	 
        	//权限相关字段跳过
        	if("readPermissionRoleNos".equals(fieldName) || "readPermissionUserIds".equals(fieldName))
        		continue;
        	 
        	 builder2.add(new TermQuery(new Term(fieldName , keywords)) , BooleanClause.Occur.SHOULD);
        }
        
        BooleanQuery  query2 = builder2.build();
        
        //权限和关键词搜索是and关系 ==> 权限 And 关键词或的搜索
        BooleanQuery query = builder1.add(query2 , BooleanClause.Occur.MUST)
												        	 .add( new TermQuery(new Term("currentVersionFlag" , "1")) , BooleanClause.Occur.MUST)
												        	 .build();
		
		// 目录对象
		Directory directory = FSDirectory.open(new File(REPOSITORY_DATASTORE_INDEX_HOME).toPath());
		
		// 创建读取工具
		IndexReader reader = DirectoryReader.open(directory);
		
		// 创建搜索工具
		IndexSearcher searcher = new IndexSearcher(reader);
		
		// 搜索数据，查询[0 , end)条
		TopDocs topDocs = searcher.search(query, end);
		
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		
		//总命中条数
		int totalHits = (int) topDocs.totalHits.value;
		
		//不能越界
		end = end <= totalHits ? end : totalHits;
		
		List<FileInfoDTO> fileInfoDTOList = new ArrayList<FileInfoDTO>();
		 
		//高亮工具
		Highlighter highlighter = createSpanHighlighter(query);
		
		//中文分词器
		IKAnalyzer ikAnalyzer = new IKAnalyzer();
		
	    for (int i = start; i < end; i++) {
	    	
            ScoreDoc scoreDoc = scoreDocs[i];
			
		    // 获取文档编号
			int docID = scoreDoc.doc;
			
			Document doc = reader.document(docID);
			
			FileInfoDTO dto = new FileInfoDTO();
			
			dto.setId(doc.get("id"));
			dto.setFileName(doc.get("fileName"));
			dto.setSize(doc.get("size"));
			dto.setReadPermissionRoleNos(doc.get("readPermissionRoleNos"));
			dto.setReadPermissionUserIds(doc.get("readPermissionUserIds"));
			dto.setMimeType(doc.get("mimeType"));
			dto.setSuffix(doc.get("suffix"));
			dto.setContent(doc.get("content"));
			dto.setScore(scoreDoc.score);
			
			//高亮关键字后的文件内容
			 String content  = doc.get("content");
			 if(content != null) {
				 String highlightContent = highlighter.getBestFragment(ikAnalyzer, "content", doc.get("content"));
				 highlightContent = highlightContent == null ? doc.get("content") : highlightContent;
				 dto.setHighlightContent(highlightContent);
			 }
			
			//高亮关键字后的文件名
			 String fileName  = doc.get("fileName");
			 if(fileName != null) {
				 String highlightFileName =highlighter.getBestFragment(ikAnalyzer, "fileName", doc.get("fileName"));
				 highlightFileName = highlightFileName == null ? doc.get("fileName") : highlightFileName;
				 dto.setHighlightFileName(highlightFileName);
			 }
			 
			fileInfoDTOList.add(dto);
		}
		
	    return CommonResult.success(CommonPage.create(pageNum , pageSize , fileInfoDTOList , totalHits));
	}
	
	
	/**
	 * @function 创建测试的dto同时创建索引(仅用于测试)
	 * @author 肖荣辉
	 * @date 2021年7月2日
	*/
	private static FileInfoDTO createIndexByTestDTO(String id , String fileName  , String content ,  String readPermissionRoleNos ,   String readPermissionUserIds , String suffix) throws Exception {
		FileInfoDTO dto = new FileInfoDTO();
		dto.setId(id);
		dto.setFileName(fileName);
		dto.setSuffix(suffix);
		dto.setContent(content);
		dto.setReadPermissionRoleNos(readPermissionRoleNos);
		dto.setReadPermissionUserIds(readPermissionUserIds);
		dto.setCurrentVersionFlag("1");
		createIndex(dto);
		return dto;
	}
	
	public static void main(String[] args) throws Exception {
		
		createIndexByTestDTO("1", "张三丰.jpg", "这是一个武林宗师", JSON.toJSONString(Arrays.asList("role1" , "role21")) , JSON.toJSONString(Arrays.asList("800" , "901")) ,  "jpg");
		createIndexByTestDTO("3", "蜀国刘备.doc", "美丽的蜀国上海滩",JSON.toJSONString(Arrays.asList("role2" , "role23")) , JSON.toJSONString(Arrays.asList("801" , "901")) ,"doc");
		createIndexByTestDTO("2", "蜀国马超.gif", "回马枪 , 大军来到", JSON.toJSONString(Arrays.asList("role1" , "role22")) , JSON.toJSONString(Arrays.asList("800" , "905")) ,"gif");
		createIndexByTestDTO("4", "蚂蚁.png", "威武的大剑", JSON.toJSONString(Arrays.asList("role4" , "role24")) , JSON.toJSONString(Arrays.asList("801" , "908")) ,"png");
		createIndexByTestDTO("5", "曹操.pdf", "金光闪闪的蜀国装备",JSON.toJSONString(Arrays.asList("role3" , "role22")) , JSON.toJSONString(Arrays.asList("803" , "901")) , "pdf");
		createIndexByTestDTO("6", "香蕉.doc", "蜀国的香肠",JSON.toJSONString(Arrays.asList("role2" , "role26")) , JSON.toJSONString(Arrays.asList("808" , "901")) ,"doc");
		
		//用户的角色为role22, role26 ,  id 为 801 , 搜索字段fileName或content中包含关键字"蜀国"的文档信息
		CommonResult<CommonPage<FileInfoDTO>>  rst2 = multiFieldTermPermissionSearchWithHightlight("蜀国", 1 , 10, "801", Arrays.asList("role22" , "role26"), "fileName" , "content");
		
		printFileInfoDTORst(rst2);
		
	}
	
	
	@Data
	public static class FileInfoDTO implements Serializable {

		private static final long serialVersionUID = 1L;
		
		//文件唯一标识ID
		private String id;
		
		//文件名称
		private String fileName;
		
		//文件内容(如果是文本文件)
		private String content;
		
		//文件大小(字节)
		private String size;
		
		//拥有该文件读权限的角色json集合字符串
		private String readPermissionRoleNos;
		
		//拥有该文件读权限的用户ID json集合字符串
		private String readPermissionUserIds;
		
		//mime-type
		private String mimeType;
		
		//文件后缀
		private String suffix;
		
		//文件得分
		private double score;
		
		//高亮关键字后的文件内容
		private String highlightContent;
		
		//高亮关键字后的文件名
		private String highlightFileName;
		
		//是否是当前版本 (0 : 否 , 1 : 是)
		private String currentVersionFlag;
		
	}
	
	
	
	
}
